Goal
Predict the next 3 months of daily quantities of vehicle parts ordered.
1 . Set Up
1.1 Load Libraries
# Main Packages
library(tidyverse)
library(lubridate)
library(tidyquant)
library(plotly)
library(ggthemes)
# Time Based Feature Extraction
library(timetk)
# Holidays
library(timeDate)
# Weather Data
library(riem)
# Forecasting
library(forecast)
library(sweep)
# Machine Learning
library(parsnip)
library(yardstick)
library(glmnet)
1.2 Load Data
# Load data
sales_data_raw <- read_csv('sales_data_sample.csv')
Parsed with column specification:
cols(
.default = col_character(),
ORDERNUMBER = [32mcol_double()[39m,
QUANTITYORDERED = [32mcol_double()[39m,
PRICEEACH = [32mcol_double()[39m,
ORDERLINENUMBER = [32mcol_double()[39m,
SALES = [32mcol_double()[39m,
QTR_ID = [32mcol_double()[39m,
MONTH_ID = [32mcol_double()[39m,
YEAR_ID = [32mcol_double()[39m,
MSRP = [32mcol_double()[39m
)
See spec(...) for full column specifications.
data <- sales_data_raw
data
2. Data Preprocessing
2.1 Parts Table
#
parts_d_tbl_all <- data %>%
select(ORDERDATE, ORDERNUMBER, QUANTITYORDERED, PRICEEACH,
PRODUCTLINE, PRODUCTCODE, STATUS) %>%
mutate(ORDERDATE = mdy_hm(ORDERDATE),
ORDERDATE = as_datetime(ORDERDATE)) %>%
group_by(ORDERDATE, PRODUCTCODE) %>%
summarise(QUANTITYORDERED)
parts_d_tbl <- parts_d_tbl_all %>%
group_by(ORDERDATE) %>%
summarise(TOTAL = sum(QUANTITYORDERED))
2.2 Quantity Ordered Over Time
# Plot it
g <- parts_d_tbl %>%
ggplot(aes(ORDERDATE, TOTAL)) +
geom_line(alpha = 0.5, color = '#2c3e50') +
geom_smooth(method = "loess", span = 0.5) +
theme_tq() +
scale_color_tq() +
labs(x="",y="Quantity",title=" Quantity Ordered Over Time")
ggplotly(g) %>%
layout(xaxis = list(rangeslider = list(type = "date")))
Note the seasonality in the data.
2.3 Data Augmentation
- Add time-based features using
timetk
- Add holidays using
timedate
# Add time-based features
parts_d_tbl <- parts_d_tbl %>%
tk_augment_timeseries_signature() %>%
select(ORDERDATE, TOTAL, index.num, year, half, quarter, month.lbl, day, wday.lbl)
# Create holidays
holidays <- holidayNYSE(year = c(2003, 2004, 2005)) %>% ymd()
# Add holidays feature
parts_d_tbl <- parts_d_tbl %>%
mutate(holiday = case_when(
ORDERDATE %in% holidays ~ 1,
TRUE ~ 0
))
3. Exploratory Analysis
3.1 Autocorrelation
autocorrelate <- function(data, value, lags = 0:20) {
value_expr <- enquo(value)
acf_values <- data %>%
select(!! value_expr) %>%
pull() %>%
acf(lag.max = tail(lags, 1), plot = FALSE) %>%
.$acf %>% # Remove text
.[,,1]
ret <- tibble(acf = acf_values) %>%
rowid_to_column(var = "lag") %>% # Add lag columns
mutate(lag = lag - 1) %>%
filter(lag %in% lags)
return(ret)
}
g <- parts_d_tbl %>%
autocorrelate(TOTAL, lags = 0:nrow(.)) %>%
ggplot(aes(lag, acf)) +
geom_point(alpha = 0.5, color = "#2c3e50") +
expand_limits(y = c(-1, 1)) +
theme_tq() +
labs(title = "Autocorrelation")
ggplotly(g)
3. Modeling
3.1 Train-Test-Split
train_test_split_date <- ymd("2005-03-01")
train_tbl <- parts_d_tbl %>%
filter(ORDERDATE < train_test_split_date)
test_tbl <- parts_d_tbl %>%
filter(ORDERDATE >= train_test_split_date)
# What Autocorrelation can we use in a multivariate model?
nrow(test_tbl)
[1] 30
3.2 Evaluate 60-Day Moving Average
moving_average_train_tbl <- train_tbl %>%
select(ORDERDATE, TOTAL) %>%
mutate(moving_average = rollmean(TOTAL, k = 30, na.pad = TRUE, align = "right"))
g <- moving_average_train_tbl %>%
bind_rows(test_tbl %>% select(ORDERDATE, TOTAL)) %>%
fill(moving_average, .direction = "down") %>%
mutate(ORDERDATE = as.Date(as.character(ORDERDATE))) %>%
ggplot(aes(ORDERDATE, TOTAL)) +
geom_vline(xintercept = train_test_split_date, color = "red") +
geom_point(color = "#2c3e50") +
geom_line(aes(y = moving_average), size = 1, color = "blue") +
theme_tq()
ggplotly(g)
test_tbl %>%
select(TOTAL) %>%
mutate(moving_average = moving_average_train_tbl %>%
tail(1) %>%
pull(moving_average)) %>%
mae(TOTAL, moving_average)
LS0tCnRpdGxlOiAiVGltZSBTZXJpZXMgRm9yZWNhc3RpbmciCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMgR29hbAoKUHJlZGljdCB0aGUgbmV4dCAzIG1vbnRocyBvZiBkYWlseSBxdWFudGl0aWVzIG9mIHZlaGljbGUgcGFydHMgb3JkZXJlZC4gCgojIDEgLiBTZXQgVXAKCiMjIDEuMSBMb2FkIExpYnJhcmllcwoKYGBge3J9CiMgTWFpbiBQYWNrYWdlcwpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkodGlkeXF1YW50KQpsaWJyYXJ5KHBsb3RseSkKbGlicmFyeShnZ3RoZW1lcykKCiMgVGltZSBCYXNlZCBGZWF0dXJlIEV4dHJhY3Rpb24KbGlicmFyeSh0aW1ldGspCgojIEhvbGlkYXlzCmxpYnJhcnkodGltZURhdGUpCgojIFdlYXRoZXIgRGF0YQpsaWJyYXJ5KHJpZW0pCgojIEZvcmVjYXN0aW5nCmxpYnJhcnkoZm9yZWNhc3QpCmxpYnJhcnkoc3dlZXApCgojIE1hY2hpbmUgTGVhcm5pbmcgCmxpYnJhcnkocGFyc25pcCkKbGlicmFyeSh5YXJkc3RpY2spCmxpYnJhcnkoZ2xtbmV0KQpgYGAKCiMjIDEuMiBMb2FkIERhdGEKCmBgYHtyfQojIExvYWQgZGF0YQpzYWxlc19kYXRhX3JhdyA8LSByZWFkX2Nzdignc2FsZXNfZGF0YV9zYW1wbGUuY3N2JykgCmRhdGEgPC0gc2FsZXNfZGF0YV9yYXcKZGF0YQpgYGAKCgojIDIuIERhdGEgUHJlcHJvY2Vzc2luZwoKIyMgMi4xIFBhcnRzIFRhYmxlCgpgYGB7cn0KIyAKcGFydHNfZF90YmxfYWxsIDwtIGRhdGEgJT4lCiAgc2VsZWN0KE9SREVSREFURSwgT1JERVJOVU1CRVIsIFFVQU5USVRZT1JERVJFRCwgUFJJQ0VFQUNILCAKICAgICAgICAgUFJPRFVDVExJTkUsIFBST0RVQ1RDT0RFLCBTVEFUVVMpICU+JQogIG11dGF0ZShPUkRFUkRBVEUgPSBtZHlfaG0oT1JERVJEQVRFKSwKICAgICAgICAgT1JERVJEQVRFID0gYXNfZGF0ZXRpbWUoT1JERVJEQVRFKSkgJT4lCiAgZ3JvdXBfYnkoT1JERVJEQVRFLCBQUk9EVUNUQ09ERSkgJT4lCiAgc3VtbWFyaXNlKFFVQU5USVRZT1JERVJFRCkKCnBhcnRzX2RfdGJsIDwtIHBhcnRzX2RfdGJsX2FsbCAlPiUKICBncm91cF9ieShPUkRFUkRBVEUpICU+JQogIHN1bW1hcmlzZShUT1RBTCA9IHN1bShRVUFOVElUWU9SREVSRUQpKQpgYGAKCiMjIDIuMiBRdWFudGl0eSBPcmRlcmVkIE92ZXIgVGltZQoKYGBge3J9CiMgUGxvdCBpdApnIDwtIHBhcnRzX2RfdGJsICU+JQogIGdncGxvdChhZXMoT1JERVJEQVRFLCBUT1RBTCkpICsgCiAgZ2VvbV9saW5lKGFscGhhID0gMC41LCBjb2xvciA9ICcjMmMzZTUwJykgKyAKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG9lc3MiLCBzcGFuID0gMC41KSArIAogIHRoZW1lX3RxKCkgKyAKICBzY2FsZV9jb2xvcl90cSgpICsgCiAgbGFicyh4PSIiLHk9IlF1YW50aXR5Iix0aXRsZT0iIFF1YW50aXR5IE9yZGVyZWQgT3ZlciBUaW1lIikKCmdncGxvdGx5KGcpICU+JQogIGxheW91dCh4YXhpcyA9IGxpc3QocmFuZ2VzbGlkZXIgPSBsaXN0KHR5cGUgPSAiZGF0ZSIpKSkKYGBgCgo+IE5vdGUgdGhlIHNlYXNvbmFsaXR5IGluIHRoZSBkYXRhLiAKCiMjIDIuMyBEYXRhIEF1Z21lbnRhdGlvbgoKLSBBZGQgdGltZS1iYXNlZCBmZWF0dXJlcyB1c2luZyBgdGltZXRrYAotIEFkZCBob2xpZGF5cyB1c2luZyBgdGltZWRhdGVgCgpgYGB7cn0KIyBBZGQgdGltZS1iYXNlZCBmZWF0dXJlcwpwYXJ0c19kX3RibCA8LSBwYXJ0c19kX3RibCAlPiUKICB0a19hdWdtZW50X3RpbWVzZXJpZXNfc2lnbmF0dXJlKCkgJT4lCiAgc2VsZWN0KE9SREVSREFURSwgVE9UQUwsIGluZGV4Lm51bSwgeWVhciwgaGFsZiwgcXVhcnRlciwgbW9udGgubGJsLCBkYXksIHdkYXkubGJsKQoKIyBDcmVhdGUgaG9saWRheXMKaG9saWRheXMgPC0gaG9saWRheU5ZU0UoeWVhciA9IGMoMjAwMywgMjAwNCwgMjAwNSkpICU+JSB5bWQoKQoKIyBBZGQgaG9saWRheXMgZmVhdHVyZQpwYXJ0c19kX3RibCA8LSBwYXJ0c19kX3RibCAlPiUKICBtdXRhdGUoaG9saWRheSA9IGNhc2Vfd2hlbigKICAgIE9SREVSREFURSAlaW4lIGhvbGlkYXlzIH4gMSwgCiAgICBUUlVFIH4gMAogICkpCmBgYAoKIyAzLiBFeHBsb3JhdG9yeSBBbmFseXNpcwoKIyMgMy4xIEF1dG9jb3JyZWxhdGlvbgoKCmBgYHtyfQphdXRvY29ycmVsYXRlIDwtIGZ1bmN0aW9uKGRhdGEsIHZhbHVlLCBsYWdzID0gMDoyMCkgewogICAgCiAgICB2YWx1ZV9leHByIDwtIGVucXVvKHZhbHVlKQogICAgCiAgICBhY2ZfdmFsdWVzIDwtIGRhdGEgJT4lCiAgICAgICAgc2VsZWN0KCEhIHZhbHVlX2V4cHIpICU+JQogICAgICAgIHB1bGwoKSAlPiUKICAgICAgICBhY2YobGFnLm1heCA9IHRhaWwobGFncywgMSksIHBsb3QgPSBGQUxTRSkgJT4lCiAgICAgICAgLiRhY2YgJT4lICMgUmVtb3ZlIHRleHQKICAgICAgICAuWywsMV0KICAgIAogICAgcmV0IDwtIHRpYmJsZShhY2YgPSBhY2ZfdmFsdWVzKSAlPiUKICAgICAgICByb3dpZF90b19jb2x1bW4odmFyID0gImxhZyIpICU+JSAjIEFkZCBsYWcgY29sdW1ucwogICAgICAgIG11dGF0ZShsYWcgPSBsYWcgLSAxKSAlPiUgCiAgICAgICAgZmlsdGVyKGxhZyAlaW4lIGxhZ3MpIAogICAgCiAgICByZXR1cm4ocmV0KQp9IAoKZyA8LSBwYXJ0c19kX3RibCAlPiUKICAgIGF1dG9jb3JyZWxhdGUoVE9UQUwsIGxhZ3MgPSAwOm5yb3coLikpICU+JQogICAgZ2dwbG90KGFlcyhsYWcsIGFjZikpICsKICAgIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUsIGNvbG9yID0gIiMyYzNlNTAiKSArCiAgICBleHBhbmRfbGltaXRzKHkgPSBjKC0xLCAxKSkgKwogICAgdGhlbWVfdHEoKSArCiAgICBsYWJzKHRpdGxlID0gIkF1dG9jb3JyZWxhdGlvbiIpCgpnZ3Bsb3RseShnKQpgYGAKCiMjIDMuIE1vZGVsaW5nCgojIyAzLjEgVHJhaW4tVGVzdC1TcGxpdAoKYGBge3J9CnRyYWluX3Rlc3Rfc3BsaXRfZGF0ZSA8LSB5bWQoIjIwMDUtMDMtMDEiKQoKdHJhaW5fdGJsIDwtIHBhcnRzX2RfdGJsICU+JQogICAgZmlsdGVyKE9SREVSREFURSA8IHRyYWluX3Rlc3Rfc3BsaXRfZGF0ZSkKCnRlc3RfdGJsIDwtIHBhcnRzX2RfdGJsICU+JQogICAgZmlsdGVyKE9SREVSREFURSA+PSB0cmFpbl90ZXN0X3NwbGl0X2RhdGUpCgojIFdoYXQgQXV0b2NvcnJlbGF0aW9uIGNhbiB3ZSB1c2UgaW4gYSBtdWx0aXZhcmlhdGUgbW9kZWw/Cm5yb3codGVzdF90YmwpCmBgYAoKCiMjIDMuMiBFdmFsdWF0ZSA2MC1EYXkgTW92aW5nIEF2ZXJhZ2UKCmBgYHtyfQptb3ZpbmdfYXZlcmFnZV90cmFpbl90YmwgPC0gdHJhaW5fdGJsICU+JQogICAgc2VsZWN0KE9SREVSREFURSwgVE9UQUwpICU+JQogICAgbXV0YXRlKG1vdmluZ19hdmVyYWdlID0gcm9sbG1lYW4oVE9UQUwsIGsgPSAzMCwgbmEucGFkID0gVFJVRSwgYWxpZ24gPSAicmlnaHQiKSkgCgpnIDwtIG1vdmluZ19hdmVyYWdlX3RyYWluX3RibCAlPiUKICAgIGJpbmRfcm93cyh0ZXN0X3RibCAlPiUgc2VsZWN0KE9SREVSREFURSwgVE9UQUwpKSAlPiUKICAgIGZpbGwobW92aW5nX2F2ZXJhZ2UsIC5kaXJlY3Rpb24gPSAiZG93biIpICU+JQogICAgbXV0YXRlKE9SREVSREFURSA9IGFzLkRhdGUoYXMuY2hhcmFjdGVyKE9SREVSREFURSkpKSAlPiUKICAgIGdncGxvdChhZXMoT1JERVJEQVRFLCBUT1RBTCkpICsKICAgIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IHRyYWluX3Rlc3Rfc3BsaXRfZGF0ZSwgY29sb3IgPSAicmVkIikgKwogICAgZ2VvbV9wb2ludChjb2xvciA9ICIjMmMzZTUwIikgKwogICAgZ2VvbV9saW5lKGFlcyh5ID0gbW92aW5nX2F2ZXJhZ2UpLCBzaXplID0gMSwgY29sb3IgPSAiYmx1ZSIpICsKICAgIHRoZW1lX3RxKCkgCgpnZ3Bsb3RseShnKQoKdGVzdF90YmwgJT4lCiAgICBzZWxlY3QoVE9UQUwpICU+JQogICAgbXV0YXRlKG1vdmluZ19hdmVyYWdlID0gbW92aW5nX2F2ZXJhZ2VfdHJhaW5fdGJsICU+JSAKICAgICAgICAgICAgICAgdGFpbCgxKSAlPiUgCiAgICAgICAgICAgICAgIHB1bGwobW92aW5nX2F2ZXJhZ2UpKSAlPiUKICAgIG1hZShUT1RBTCwgbW92aW5nX2F2ZXJhZ2UpCmBgYAoKCgoKCgoKCgoKCgoKCgoKCgoKCgo=